*** Set paths
local dir "..."
local dropbox_data "..."
local dropbox_data_create "..."
local local_data "..."
local dropbox_fig "..."
local dropbox_tab "..."


************************************************************
*** COLLAPSE DATA TO AVOID DOUBLE COUNTING
************************************************************
	
collapse IIROC_PRICE quote quote_rel quote_0  share_won_of_dem share_won_of_dem_0 log_shares share_ratio  share_Ei share_E0 days_post_auction issuesum_tot, by(bidderid period  b_is_d large_trade)

******************************
*** FIGURE 5 
******************************

if($week ==0 & $homedealer ==0 & $keepall ==1){
	preserve
	gen bidderid3= bidderid
	replace bidderid3 = 46 if bidderid==61 | bidderid==94 | bidderid==18 

	gen share_Ei_norm = share_Ei*100 // in %
	binscatter   share_Ei_norm  share_won_of_dem , lcolor(black)  control(i.period i.bidderid3 )  ///
	msymbol(circle_hollow) yti("s{subscript:tj} in %", size(vlarge)) xti("won{subscript:tj} in mil C$", size(large)) ///
	xlabel(, labsize(large)) ylabel( 8 10 , labsize(large))  nquantiles(25)
	graph export "`dropbox_fig'/graph_cost_marketshares_withDfes_$week _$homedealer _$keepall.png", replace

	egen p99 = pctile(share_won_of_dem), p(90)
	binscatter   share_Ei_norm  share_won_of_dem if share_won_of_dem<p99 , lcolor(black)  control(i.period ) ///
	msymbol(circle_hollow) yti("s{subscript:tj} in %", size(vlarge)) xti("won{subscript:tj} in mil C$", size(large)) ///
	xlabel(, labsize(large)) ylabel( 0 15  , labsize(large)) ysc(r(0,15)) nquantiles(25)
	graph export "`dropbox_fig'/graph_cost_marketshares_withoutDfes_$week _$homedealer _$keepall.png", replace
	restore
}

************************************************************
*** IV REGRESSION -INPUT FOR APPENDIX TABLE A6  
************************************************************

*drop benchmark dealer 
drop if bidderid==$basedealer

*date and dealer dummies + choose base 
tabulate period, gen(var_)
if $week ==0 {
	ren var_371 base1 // use this date because I used it before (shouldn't matter)
}
if $week ==1 {
	ren var_1 base1
}

gen bidderid3= bidderid
replace bidderid3 = 46 if bidderid==61 | bidderid==94 | bidderid==18 
tabulate bidderid3, gen(bidder_)
ren  bidder_4 base2

label var share_won_of_dem "share won"
label var quote_rel "rel quote"

if($fej ==1){
	eststo clear
	eststo: ivreg2 log_shares var_* bidder_*  (quote_rel=share_won_of_dem)
	esttab using  "`dropbox_tab'/IV_reg1_$week _$homedealer _$keepall _$largetrade _$fej _$instrument.tex",  ci ar2  keep(quote_rel) wide nobaselevels label  replace 
	eststo clear
	eststo: reghdfe quote_rel  share_won_of_dem , absorb(period bidderid3) 
	predict zfs2, xb
	label var zfs2 "rel quote"
	eststo: reghdfe log_shares  quote_rel  , absorb(period bidderid3)
	eststo: reghdfe log_shares  zfs2   , absorb(period bidderid3 ) 
	esttab using  "`dropbox_tab'/IV_reg2_$week _$homedealer _$keepall _$largetrade _$fej _$instrument.tex", wide ci ar2 nobaselevels label mtitle("1st"  "OLS"   "IV")  nonumbers  replace 
	eststo clear

	reg quote_rel  share_won_of_dem var_*  bidder_* if share_won_of_dem!=. 
	predict zfs, xb
	reg log_shares  zfs   var_*   bidder_*
}
if($fej ==0){
	eststo clear
	eststo: ivreg2 log_shares var_*   (quote_rel=share_won_of_dem)
	esttab using  "`dropbox_tab'/IV_reg1_$week _$homedealer _$keepall _$largetrade _$fej _$instrument.tex",  ci ar2  keep(quote_rel) wide nobaselevels label  replace 
	eststo clear
	eststo: reghdfe quote_rel  share_won_of_dem , absorb(period ) 
	predict zfs2, xb
	label var zfs2 "rel quote"
	eststo: reghdfe log_shares  quote_rel  , absorb(period )
	eststo: reghdfe log_shares  zfs2   , absorb(period  ) 
	esttab using  "`dropbox_tab'/IV_reg2_$week _$homedealer _$keepall _$largetrade _$fej _$instrument.tex", wide ci ar2 nobaselevels label mtitle("1st"  "OLS"   "IV")  nonumbers  replace 
	eststo clear

	reg quote_rel  share_won_of_dem var_*  if share_won_of_dem!=. 
	predict zfs, xb
	reg log_shares  zfs   var_*  
}

*compute sigmahat
gen alphahat = _b[zfs] 
gen sigma 	 = 1/alphahat

*store the fixed effects	
gen fet=. 
ds  var_* 
foreach var in `r(varlist)'{
replace fet =  _b[`var']  if `var'==1 

}
replace fet =0 if base1==1
tab period if fet==0
	*--> should be only one if nothing dropped out due to colinearity

*dealer fixed effect
if $fej ==0{
	gen fej=0 
}
if $fej ==1{
	gen fej =.
	ds bidder_*
	foreach var in `r(varlist)'{
	replace fej =  _b[`var']  if `var'==1 
	}
	replace fej =0 if base2==1
	tab bidderid if fej==0
		*---> should be the omitted bidder
}

*get xij
predict resids, residuals
gen double xij = resids + _b[_cons] + fej + fet 
gen double constant =_b[_cons]
tabstat xij, stats(N p50 mean min max)


************************************************************
*** MERGE BACK WITH ALL DATA
************************************************************
		
keep period bidderid alphahat sigma xij   fet fej  constant

tempfile tempxi
save  `tempxi' , replace


use `temprest', clear
merge m:1 period bidderid using `tempxi'
drop if market_type=="auction"

	tab bidderid if _merge==1 

		*should be:
			*any trade by the baseline dealer
			*any trade by a dealer that is not selling on that day on CanDeal
			
	tab _merge
	drop _merge

	
*bring back the retail investors			
if ($keepall) ==0 {	
	append using "`local_data'/data_base_retailer_2022.dta"
}
	

************************************************************
*** PREPARE TO OUTSHEET FOR MATLAB 
************************************************************

*** A) Fill in missing sigma, quote, xij, EP  (where bidder did not trade on the platform)

*xij for baseline dealer 
replace xij =0  if bidderid==$basedealer

*fill in sigma  for the baseline dealer
egen a0 = max(sigma)
replace sigma = a0 if sigma==.
drop a0

*fill in missing dates (needed when assuming a dealer is active on platform whenever OTC)
bys bidderid (period): replace xij = xij[_n-1] if xij==.
count if xij==. & bidderid!=$basedealer
bys period: egen a0 = median(xij)
replace xij = a0 if xij ==.
drop a0 

*fill in sigma
egen a0 = max(sigma)
replace sigma = a0 if sigma==.
drop a0 

*fill constant_neg 
egen a0 = max(constant)
replace constant = a0 if constant==.
drop a0 

*normalize xij so that it matches the matlab code (and model as written)
replace xij = xij *sigma

	*how many quotes are missing when dealers trade OTC?
	count if quote==. 
	count if xij==.
	   
	*GRAPH:  xij's		 
	bys bidderid: egen med_xi = median(xij)
	graph box xij if b_is_d==1, over(bidderid, sort(med_xi) ///
	label(angle(60))) noout yti(basis points)  graphregion(color(white)) 
	graph export "`dropbox_fig'/graph_box_xij_$week _$homedealer _$keepall _$quantityrobust _$fej.png", replace

	*GRAPH:  xij's		 
	preserve 
	
	replace bidderid=46 if bidderid==46001
	replace bidderid=18 if bidderid==18001
	replace bidderid=24 if bidderid==24001
	collapse  xij fej  , by(bidderid  period)

	label define dealer_lab  46 "d1" 18 "d4" 24 "d5" 61 "d3" 94  "d2" 93 "d6" 77 "d7" 88 "d8" 67 "d0" 
	label values bidderid dealer_lab 
	
	*create variable with dealer size (as in matlab)
	gen d_size = .
	replace d_size =1 if bidderid==46
	replace d_size =2 if bidderid==94
	replace d_size =3 if bidderid==61
	replace d_size =4 if bidderid==18
	replace d_size =5 if bidderid ==24
	replace d_size =6 if bidderid==67 
	replace d_size =7 if bidderid==93
	replace d_size =8 if bidderid==77
	replace d_size =9 if bidderid==88
	
	graph box xij , over(bidderid, sort(d_size) label(labsize(vlarge)))  yti(basis points, size(vlarge) )  ///
	graphregion(color(white)) marker(1, msize(vsmall ) msymbol(circle_hollow ) mcolor(black%70)) ///
	graphregion(color(white)) ylabel(,labsize(large))  bar(1, fcolor(white) lcolor(black)) noout 
	graph export "`dropbox_fig'/graph_box_xijs_$week _$homedealer _$keepall _$quantityrobust _$fej_B.png", replace
	restore
	

*** B) Go onto the parent level (to obtain sufficient power per day)

*bidder id's
replace bidderid=46 if bidderid==46001
replace bidderid=18 if bidderid==18001
replace bidderid=24 if bidderid==24001

replace homedealerID = 46 if homedealerID==46001
replace homedealerID = 18 if homedealerID==18001
replace homedealerID = 24 if homedealerID==24001

*quote on parent level (filled in)
bys period bidderid IIROC_SIDE trading_venue: egen a0 = mean(IIROC_YIELD) if trading_venue=="CanDeal" 
bys period bidderid IIROC_SIDE: egen quote2 = max(a0)
drop a0 
bys bidderid IIROC_SIDE (period):    replace quote2 = quote2[_n-1] if quote2==.    //this doesn't change much in the regressions 
gen periodneg=-period
bys bidderid IIROC_SIDE (periodneg): replace quote2 = quote2[_n-1] if quote2==.	
count if quote2==.
replace quote = quote2 

	*check if quote is unique:
	preserve
	collapse IIROC_QUANTITY, by(bidderid IIROC_SIDE period quote)
	bys bidderid IIROC_SIDE period: gen a0=_N
	tab a0
	restore 	
	
	
*** C) Create variables and drop few missings to avoid problems when running the matlab code 

*use average theta per day
bys period: egen a0 = mean(midyield_bl)
replace midyield_bl=a0 
drop a0 

*drop empty midyield and xij
drop if midyield_bl==.
drop if IIROC_YIELD==.
count if xij==.	
	
*create baseline dealer
gen basedealer =  $basedealer

*create date-investortype-side grid for latlab code
drop id
egen id = group(period retailer IIROC_SIDE)
bys period retailer IIROC_SIDE bidderid large_trade: gen countingj = _n
bys period retailer IIROC_SIDE bidderid homedealer large_trade (exec_time): gen countingjH = _n

*create time grid 
gen a0 = exec_time_grid
egen a1 = min(exec_time_grid)
gen time_h = (a0 - a1) /(100*60*60) //hours
generate string_time = string( exec_time_grid,"%tc") 
drop a0 a1 

*make all string variables numeric:
sort bidderid period 
destring bidderid, replace

egen 	lei = group(LEI)
replace lei = 0 if LEI=="client"
replace lei = -999 if LEI=="NA"

gen 	venue2 =1  if trading_venue=="OTC"
replace venue2 = 2 if trading_venue=="CanDeal"

drop side 
gen  side 	 =  1 if IIROC_SIDE=="BUY"
replace side = -1 if IIROC_SIDE=="SELL"

gen retail_dummy = (retailer==1)

	*check that nothing that I need is missing	
	count if IIROC_PRICE==.
	count if IIROC_YIELD==.
	count if quote==.
	count if IIROC_QUANTITY==.

	count if midyield_bl==. 		    
	count if xij ==.

*cij (no longer used)
gen ctij = IIROC_YIELD - quote if trading_venue=="OTC"

*EP (no longer used)
gen EP = .

*create weight by the market sizes of the dealers (no longer used)
gen sharei	=0
gen Ei		=0

levelsof side, local(levels_sides) 
foreach l of local levels_sides {

*market share
bys bidderid  period IIROC_SIDE : gen a0 = _N  	if  market_type=="secondary" & side==`l'
replace Ei = a0
drop a0

*shareE_tsi as % of total platform market share (across dealers)
bys period IIROC_SIDE : gen a0 = _N  				if market_type=="secondary"  & side==`l'
replace sharei = Ei/a0 if side==`l'
drop a0
}

*Set to NaN what is not used in Matlab (to not get confused in Matlab code)
replace bidyield_can=.
replace askyield_can=.
replace bidyield_bl=.
replace askyield_bl=.

gen xijH = xij 

*sort 
sort id exec_time_grid 

browse id period retail_dummy side  midyield_bl large_trade askyield_can  IIROC_YIELD venue2 bidyield_bl askyield_bl  share_Ei share_OTCi xij sigma bidderid countingj quote feij ctij  basedealer if trading_venue=="OTC"

************************************************************
*** OUTSHEET FOR MATLAB
************************************************************

outsheet id period retail_dummy side  midyield_bl large_trade askyield_can  IIROC_YIELD venue2 bidyield_bl askyield_bl share_Ei share_OTCi xij sigma bidderid countingj quote feij ctij  basedealer homedealerID  xij xijH homedealer countingjH using "1_input/MatlabEstimationData_$week _$homedealer _$keepall _$quantityrobust _$largetrade _$fej.csv", replace 

